1.Over - Under Analysis

library(data.table)
library(anytime)
library(stringr)
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.5.1
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
## 
##     between, first, last
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
library(ggplot2)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(jpeg)
library(fields)
## Loading required package: spam
## Loading required package: dotCall64
## Loading required package: grid
## Spam version 2.2-0 (2018-06-19) is loaded.
## Type 'help( Spam)' or 'demo( spam)' for a short introduction 
## and overview of this package.
## Help for individual functions is also obtained by adding the
## suffix '.spam' to the function name, e.g. 'help( chol.spam)'.
## 
## Attaching package: 'spam'
## The following objects are masked from 'package:base':
## 
##     backsolve, forwardsolve
## Loading required package: maps
## See www.image.ucar.edu/~nychka/Fields for
##  a vignette and other supplements.
library(formatR)
oo = readRDS("/Users/ilkerkurtulus/Documents/cse-master/ie582/data/hw1/df9b1196-e3cf-4cc7-9159-f236fe738215_odd_details.rds")
oo = data.table(oo)
oo$date = anytime(oo$date)
mm = readRDS("/Users/ilkerkurtulus/Documents/cse-master/ie582/data/hw1/df9b1196-e3cf-4cc7-9159-f236fe738215_matches.rds")
mm = data.table(mm)
mm[, `:=`(c("leagueId", "type"), NULL)]
mm$date = anytime(mm$date)
mm = mm[is.na(mm$score) == FALSE]
mm = mm[, `:=`(c("home_goal", "away_goal"), tstrsplit(score, 
    ":", fixed = TRUE))]
mm$home_goal = as.numeric(mm$home_goal)
mm$away_goal = as.numeric(mm$away_goal)
mm[, `:=`(total_goals, home_goal + away_goal)]
mm = mutate(mm, is_over = ifelse(total_goals > 2.5, 1, 0))
mm = data.table(mm)
head(mm)
##     matchId        home            away score                date
## 1: KjF6FiA6   tottenham manchester city   0:0 2010-08-14 15:45:00
## 2: ILVbJgQm aston villa        west ham   3:0 2010-08-14 18:00:00
## 3: SGIEDVvJ      wolves      stoke city   2:1 2010-08-14 18:00:00
## 4: YwL5xFHJ      bolton          fulham   0:0 2010-08-14 18:00:00
## 5: lQJAEBPC       wigan       blackpool   0:4 2010-08-14 18:00:00
## 6: byRcHDuf  sunderland      birmingham   2:2 2010-08-14 18:00:00
##    home_goal away_goal total_goals is_over
## 1:         0         0           0       0
## 2:         3         0           3       1
## 3:         2         1           3       1
## 4:         0         0           0       0
## 5:         0         4           4       1
## 6:         2         2           4       1

Different handicap values mean different events with different probability. So we should not considered them together. Thats why from “ah” betType i will pick totalhandicap = 0 and for “ou” betType lets pick 2.5

While choosing bookmakers we need to care that bookmakers should provide odds of above handicap and bettype. To do that lets print them and choose:

oo[(betType == "ou") & (totalhandicap == "2.5"), .N, by = bookmaker]
##            bookmaker     N
##  1:            1xBet 10187
##  2:      bet-at-home 11838
##  3:           bet365 13809
##  4:          Betclic 12011
##  5:        BetVictor 14063
##  6:           Betway 11041
##  7:             bwin 11528
##  8:           Expekt  9664
##  9:      Paddy Power 12492
## 10:           Unibet 10905
## 11:     William Hill  5494
## 12:           youwin 11280
## 13:          Betsafe 14853
## 14:          Betsson 15276
## 15:      Sportingbet 10672
## 16:           Tipico 10046
## 17:         Pinnacle 14610
## 18:            10Bet 36443
## 19:            12BET 11148
## 20:           188BET 12306
## 21:           ComeOn 11497
## 22:           SBOBET 10418
## 23:      Interwetten  7279
## 24:         888sport  7319
## 25:          Betfair  6219
## 26: Betfair Exchange 14914
##            bookmaker     N
oo[(betType == "ah") & (totalhandicap == "0"), .N, by = bookmaker]
##            bookmaker     N
##  1:            1xBet 10584
##  2:           bet365 14846
##  3:      Interwetten  7285
##  4:           Unibet  5283
##  5:        BetVictor  6863
##  6:      Paddy Power  4972
##  7:         Pinnacle  9853
##  8: Betfair Exchange 12309
##  9:            10Bet  9758
## 10:           188BET  6847
## 11:           ComeOn  5745
## 12:            12BET  5796
## 13:           SBOBET  5568
## 14:          Betsson   322
## 15:          Betsafe    44
## 16:           youwin    60
## 17:           Expekt    30

So we can select 5 bookmakers as 1xBet, bet365, Betfair Exchange, Pinnacle and 10Bet

bookmakers = c("1xBet", "bet365", "Betfair Exchange", "Pinnacle", 
    "10Bet")
func_1a = function(n_bm) {
    df = oo[bookmaker == bookmakers[n_bm]]
    pdf_1 = dcast(df[(betType != "ou") & (betType != "ah")], 
        matchId + bookmaker ~ betType + oddtype, value.var = c("odd"), 
        fun.aggregate = mean)
    
    # only choose ah = 0 due to different handicap means
    # different odds, so its not useful to mix different
    # handicaps
    x = df[(betType == "ah") & (totalhandicap == "0")]
    pdf_2 = dcast(x, matchId ~ betType + oddtype, value.var = c("odd"), 
        fun.aggregate = mean)
    # only choose ou = 2.5 due to different handicap means
    # different odds, so its not useful to mix different
    # handicaps
    x = df[(betType == "ou") & (totalhandicap == "2.5")]
    pdf_3 = dcast(x, matchId ~ betType + oddtype, value.var = c("odd"), 
        fun.aggregate = mean)
    pdf = na.omit(pdf_1[pdf_2, on = "matchId"][pdf_3, on = "matchId"])
    
    all_df = na.omit(pdf[mm[, c("matchId", "is_over")], on = "matchId"])
    all_df = all_df[, `:=`(is_over, as.character(is_over))]
    scaled_df = scale(all_df[, 3:9])
    pca = prcomp(scaled_df, center = TRUE, scale. = TRUE)
    print(summary(pca))
    eigs = pca$sdev^2
    exp_var_ratio = eigs/sum(eigs)
    cum_exp_var_ratio = cumsum(exp_var_ratio)
    
    plot(cum_exp_var_ratio, type = "l", xlab = "# of Principle Components", 
        ylab = "Cumulative Explained Variance")
    title(paste("Cumulative Explained Variance Ratio of PCA for ", 
        bookmakers[n_bm], sep = ""))
    
    all_pca = predict(pca, newdata = scaled_df)
    all_pca2d = all_pca[, 1:2]
    all_pca2d = data.table(all_pca2d)
    all_pca2d[, `:=`(is_over, all_df$is_over)]
    ggplot(all_pca2d, aes(x = PC1, y = PC2, color = is_over)) + 
        geom_point() + ggtitle(paste("Transformed Data with p = 2 PCA and is_over results", 
        bookmakers[n_bm], sep = " "))
}
func_1a(1)
## Importance of components:
##                           PC1    PC2    PC3     PC4     PC5     PC6
## Standard deviation     1.8620 1.4220 1.1528 0.31994 0.25363 0.09678
## Proportion of Variance 0.4953 0.2889 0.1898 0.01462 0.00919 0.00134
## Cumulative Proportion  0.4953 0.7842 0.9740 0.98863 0.99782 0.99916
##                            PC7
## Standard deviation     0.07657
## Proportion of Variance 0.00084
## Cumulative Proportion  1.00000

func_1a(2)
## Importance of components:
##                           PC1    PC2    PC3     PC4     PC5     PC6
## Standard deviation     1.7096 1.5209 1.2876 0.21767 0.20100 0.12273
## Proportion of Variance 0.4175 0.3304 0.2369 0.00677 0.00577 0.00215
## Cumulative Proportion  0.4175 0.7480 0.9848 0.99161 0.99738 0.99953
##                            PC7
## Standard deviation     0.05739
## Proportion of Variance 0.00047
## Cumulative Proportion  1.00000

func_1a(3)
## Importance of components:
##                           PC1    PC2    PC3     PC4     PC5     PC6
## Standard deviation     1.7306 1.3078 1.2174 0.65847 0.53327 0.27811
## Proportion of Variance 0.4279 0.2443 0.2117 0.06194 0.04063 0.01105
## Cumulative Proportion  0.4279 0.6722 0.8839 0.94584 0.98647 0.99752
##                            PC7
## Standard deviation     0.13175
## Proportion of Variance 0.00248
## Cumulative Proportion  1.00000

func_1a(4)
## Importance of components:
##                           PC1    PC2    PC3     PC4     PC5     PC6
## Standard deviation     1.8741 1.6091 0.8743 0.28487 0.16779 0.12440
## Proportion of Variance 0.5018 0.3699 0.1092 0.01159 0.00402 0.00221
## Cumulative Proportion  0.5018 0.8717 0.9809 0.99246 0.99648 0.99869
##                            PC7
## Standard deviation     0.09562
## Proportion of Variance 0.00131
## Cumulative Proportion  1.00000

func_1a(5)
## Importance of components:
##                           PC1    PC2     PC3     PC4     PC5     PC6
## Standard deviation     1.9234 1.5186 0.71947 0.61145 0.23300 0.17029
## Proportion of Variance 0.5285 0.3295 0.07395 0.05341 0.00776 0.00414
## Cumulative Proportion  0.5285 0.8579 0.93188 0.98529 0.99305 0.99719
##                            PC7
## Standard deviation     0.14018
## Proportion of Variance 0.00281
## Cumulative Proportion  1.00000

Using first 2 components is easy to visualize however in terms of explaining variance of the data its not beneficial. Yet in our case except bet365 all datatables has higher than 0.70 explained variance ratio so 2d is a good choice to visualization.

When we look at the scatter plots we cant find a distinction between over/under result. This is caused by selection of p as 2 in PCA and behavior of the data (ie nonlinearity) is not suitable for PCA.

Calculation of manhattan and euclidian distances as well as 2D and 3D of them with MDS:

bookmakers = c("1xBet", "bet365", "Betfair Exchange", "Pinnacle", 
    "10Bet")
func_1bman = function(n_bm) {
    df = oo[bookmaker == bookmakers[n_bm]]
    pdf_1 = dcast(df[(betType != "ou") & (betType != "ah")], 
        matchId + bookmaker ~ betType + oddtype, value.var = c("odd"), 
        fun.aggregate = mean)
    
    x = df[(betType == "ah") & (totalhandicap == "0")]
    pdf_2 = dcast(x, matchId ~ betType + oddtype, value.var = c("odd"), 
        fun.aggregate = mean)
    # only choose ou = 2.5 due to different handicap means
    # different odds, so its not useful to mix different
    # handicaps
    x = df[(betType == "ou") & (totalhandicap == "2.5")]
    pdf_3 = dcast(x, matchId ~ betType + oddtype, value.var = c("odd"), 
        fun.aggregate = mean)
    
    pdf = na.omit(pdf_1[pdf_2, on = "matchId"][pdf_3, on = "matchId"])
    all_df = na.omit(pdf[mm[, c("matchId", "is_over")], on = "matchId"])
    
    all_df = all_df[, `:=`(is_over, as.character(is_over))]
    scaled_df = scale(all_df[, 3:9])
    dist_man = dist(scaled_df, method = "manhattan")
    mds_man2 = data.table(cmdscale(dist_man, eig = TRUE, k = 2)$points)
    mds_man2[, `:=`(is_over, all_df$is_over)]
    dist_euc = dist(scaled_df, method = "euclidian")
    mds_euc2 = data.table(cmdscale(dist_euc, eig = TRUE, k = 2)$points)
    mds_euc2[, `:=`(is_over, all_df$is_over)]
    p1 = qplot(data = mds_euc2, V1, V2, color = is_over) + ggtitle(paste("MDS Euclidian and is_over results for", 
        bookmakers[n_bm], sep = " "))
    p2 = qplot(data = mds_man2, V1, V2, color = is_over) + ggtitle(paste("MDS Manhattan and is_over results for", 
        bookmakers[n_bm], sep = " "))
    grid.arrange(p1, p2)
}
func_1bman(1)

func_1bman(2)

func_1bman(3)

func_1bman(4)

func_1bman(5)

Again for MDS it is really hard to say that classes are separated well. Graphs for manhattan and euclidian distances are different indeed due to one of them measures the path while the other calculates shortest path in R^n space. Thats why with manhattan distance makes distance matrix bigger and observations distributed the space in a larger scale like in graph of bet365.

Even though we cant observe a clear distinction for MDS and PCA, most probably MDS is a better choice for this data due to handling nonlinearity. When we compare plots on 1a and 1b for each bookmaker plots seem more convex for MDS which explains nonlinearity.

  1. 1x2 Analysis

Feature engineering for this part:

bookmakers = c("1xBet", "bet365", "Betfair Exchange", "Pinnacle", 
    "10Bet")
func_2a = function(n_bm) {
    df = oo[bookmaker == bookmakers[n_bm]]
    pdf_1 = dcast(df[(betType != "ou") & (betType != "ah")], 
        matchId + bookmaker ~ betType + oddtype, value.var = c("odd"), 
        fun.aggregate = mean)
    
    x = df[(betType == "ah") & (totalhandicap == "0")]
    pdf_2 = dcast(x, matchId ~ betType + oddtype, value.var = c("odd"), 
        fun.aggregate = mean)
    # only choose ou = 2.5 due to different handicap means
    # different odds, so its not useful to mix different
    # handicaps
    x = df[(betType == "ou") & (totalhandicap == "2.5")]
    pdf_3 = dcast(x, matchId ~ betType + oddtype, value.var = c("odd"), 
        fun.aggregate = mean)
    
    pdf = na.omit(pdf_1[pdf_2, on = "matchId"][pdf_3, on = "matchId"])
    
    mm[, `:=`(is_1x2, ifelse(home_goal > away_goal, "1", ifelse(home_goal == 
        away_goal, "x", "2")))]
    all_1x2 = na.omit(pdf[mm[, c("matchId", "is_1x2")], on = "matchId"])
    scaled_df = scale(all_1x2[, 3:9])
    pca = prcomp(scaled_df, center = TRUE, scale. = TRUE)
    print(summary(pca))
    eigs = pca$sdev^2
    exp_var_ratio = eigs/sum(eigs)
    cum_exp_var_ratio = cumsum(exp_var_ratio)
    
    plot(cum_exp_var_ratio, type = "l", xlab = "# of Principle Components", 
        ylab = "Cumulative Explained Variance")
    title(paste("1x2 Results - Cumulative Explained Variance Ratio of PCA for ", 
        bookmakers[n_bm], sep = ""))
    
    all_pca = predict(pca, newdata = scaled_df)
    all_pca2d = all_pca[, 1:2]
    all_pca2d = data.table(all_pca2d)
    all_pca2d[, `:=`(is_1x2, all_1x2$is_1x2)]
    ggplot(all_pca2d, aes(x = PC1, y = PC2, color = is_1x2)) + 
        geom_point() + ggtitle(paste("Transformed Data with p = 2 PCA and is_1x2 results", 
        bookmakers[n_bm], sep = " "))
}
func_2a(1)
## Importance of components:
##                           PC1    PC2    PC3     PC4     PC5     PC6
## Standard deviation     1.8620 1.4220 1.1528 0.31994 0.25363 0.09678
## Proportion of Variance 0.4953 0.2889 0.1898 0.01462 0.00919 0.00134
## Cumulative Proportion  0.4953 0.7842 0.9740 0.98863 0.99782 0.99916
##                            PC7
## Standard deviation     0.07657
## Proportion of Variance 0.00084
## Cumulative Proportion  1.00000

func_2a(2)
## Importance of components:
##                           PC1    PC2    PC3     PC4     PC5     PC6
## Standard deviation     1.7096 1.5209 1.2876 0.21767 0.20100 0.12273
## Proportion of Variance 0.4175 0.3304 0.2369 0.00677 0.00577 0.00215
## Cumulative Proportion  0.4175 0.7480 0.9848 0.99161 0.99738 0.99953
##                            PC7
## Standard deviation     0.05739
## Proportion of Variance 0.00047
## Cumulative Proportion  1.00000

func_2a(3)
## Importance of components:
##                           PC1    PC2    PC3     PC4     PC5     PC6
## Standard deviation     1.7306 1.3078 1.2174 0.65847 0.53327 0.27811
## Proportion of Variance 0.4279 0.2443 0.2117 0.06194 0.04063 0.01105
## Cumulative Proportion  0.4279 0.6722 0.8839 0.94584 0.98647 0.99752
##                            PC7
## Standard deviation     0.13175
## Proportion of Variance 0.00248
## Cumulative Proportion  1.00000

func_2a(4)
## Importance of components:
##                           PC1    PC2    PC3     PC4     PC5     PC6
## Standard deviation     1.8741 1.6091 0.8743 0.28487 0.16779 0.12440
## Proportion of Variance 0.5018 0.3699 0.1092 0.01159 0.00402 0.00221
## Cumulative Proportion  0.5018 0.8717 0.9809 0.99246 0.99648 0.99869
##                            PC7
## Standard deviation     0.09562
## Proportion of Variance 0.00131
## Cumulative Proportion  1.00000

func_2a(5)
## Importance of components:
##                           PC1    PC2     PC3     PC4     PC5     PC6
## Standard deviation     1.9234 1.5186 0.71947 0.61145 0.23300 0.17029
## Proportion of Variance 0.5285 0.3295 0.07395 0.05341 0.00776 0.00414
## Cumulative Proportion  0.5285 0.8579 0.93188 0.98529 0.99305 0.99719
##                            PC7
## Standard deviation     0.14018
## Proportion of Variance 0.00281
## Cumulative Proportion  1.00000

Using first 2 components is easy to visualize however in terms of explaining variance of the data its not beneficial. For 1x2 case we can clearly see that there is a pattern when classifying 1x2 result however we for some betmakers such as 10Bet this pattern cannot be observed. With that intuition we need to do deeper research like increasing dimension at PCA or using MDS.

bookmakers = c("1xBet", "bet365", "Betfair Exchange", "Pinnacle", 
    "10Bet")
func_2bman = function(n_bm) {
    df = oo[bookmaker == bookmakers[n_bm]]
    pdf_1 = dcast(df[(betType != "ou") & (betType != "ah")], 
        matchId + bookmaker ~ betType + oddtype, value.var = c("odd"), 
        fun.aggregate = mean)
    
    x = df[(betType == "ah") & (totalhandicap == "0")]
    pdf_2 = dcast(x, matchId ~ betType + oddtype, value.var = c("odd"), 
        fun.aggregate = mean)
    # only choose ou = 2.5 due to different handicap means
    # different odds, so its not useful to mix different
    # handicaps
    x = df[(betType == "ou") & (totalhandicap == "2.5")]
    pdf_3 = dcast(x, matchId ~ betType + oddtype, value.var = c("odd"), 
        fun.aggregate = mean)
    
    pdf = na.omit(pdf_1[pdf_2, on = "matchId"][pdf_3, on = "matchId"])
    mm[, `:=`(is_1x2, ifelse(home_goal > away_goal, "1", ifelse(home_goal == 
        away_goal, "x", "2")))]
    all_1x2 = na.omit(pdf[mm[, c("matchId", "is_1x2")], on = "matchId"])
    
    all_1x2 = all_1x2[, `:=`(is_1x2, as.character(is_1x2))]
    
    scaled_df = scale(all_1x2[, 3:9])
    dist_man = dist(scaled_df, method = "manhattan")
    mds_man2 = data.table(cmdscale(dist_man, eig = TRUE, k = 2)$points)
    mds_man2[, `:=`(is_1x2, all_1x2$is_1x2)]
    dist_euc = dist(scaled_df, method = "euclidian")
    mds_euc2 = data.table(cmdscale(dist_euc, eig = TRUE, k = 2)$points)
    mds_euc2[, `:=`(is_1x2, all_1x2$is_1x2)]
    p1 = qplot(data = mds_euc2, V1, V2, color = is_1x2) + ggtitle(paste("MDS Euclidian and is_1x2 results for", 
        bookmakers[n_bm], sep = " "))
    p2 = qplot(data = mds_man2, V1, V2, color = is_1x2) + ggtitle(paste("MDS Manhattan and is_1x2 results for", 
        bookmakers[n_bm], sep = " "))
    grid.arrange(p1, p2)
}
func_2bman(1)

func_2bman(2)

func_2bman(3)

func_2bman(4)

func_2bman(5)

At MDS apporach, like PCA we can again see pattern between 1 and 2 classes separation however for 10Bet its not clear again. So MDS didnt help us. We need to change our dimension to see whether there is a pattern or not. For distance metric we can observe separation mostly in Manhattan. So manhattan is a better choice for this model.

Comparing MDS to PCA, MDS made better dimension reduction for this model as we can see from graphs (1xBet, bet365). This is caused by expanding initial data to higher dimensional distance metric gives us extra information about the data so that explained variance. Except 10Bet, we can see patterns in graphs that can give us an insight when classifying the 1x2 outcome of a match with unsupervised approach.

3.Image Analysis

  1. Read Image
img = readJPEG("/Users/ilkerkurtulus/Downloads/IMG_7955.jpg")
str(img)
##  num [1:512, 1:512, 1:3] 0.424 0.4 0.388 0.404 0.4 ...
dim(img)
## [1] 512 512   3
if (exists("rasterImage")) {
    plot(1:2, type = "n")
    rasterImage(img, 1, 1, 2, 2)
}

if (exists("rasterImage")) {
    plot(1:4, type = "n")
    rasterImage(img[, , 1], 1, 1, 2, 2)
    rasterImage(img[, , 2], 2, 1, 3, 2)
    rasterImage(img[, , 3], 3, 1, 4, 2)
}

noise = runif(512 * 512 * 3, min = 0, max = 1)
dim(noise) = c(512, 512, 3)
nimg = noise + img
nimg_scaled = (nimg - min(nimg))/(max(nimg) - min(nimg))
if (exists("rasterImage")) {
    plot(1:2, type = "n")
    rasterImage(nimg_scaled, 1, 1, 2, 2)
}

if (exists("rasterImage")) {
    plot(1:4, type = "n")
    rasterImage(nimg_scaled[, , 1], 1, 1, 2, 2)
    rasterImage(nimg_scaled[, , 2], 2, 1, 3, 2)
    rasterImage(nimg_scaled[, , 3], 3, 1, 4, 2)
}

  1. Define greyscale function according to luminosity type:
to_greyscale = function(img) {
    img[, , 1] * 0.21 + img[, , 2] * 0.72 + img[, , 3] * 0.07
}
grey_nimg = to_greyscale(nimg_scaled)
pca_img = prcomp(grey_nimg, center = TRUE, scale. = TRUE)
eigs = pca_img$sdev^2
exp_var_ratio = eigs/sum(eigs)
cum_exp_var_ratio = cumsum(exp_var_ratio)
plot(cum_exp_var_ratio)

Even the image has 512 dimensions we can explain it well with 250 components with 0.95 confidence interval. However its still very large dimension for statistical inference. b) Construct patch matrix:

extract_path = function(data, i, j, n) {
    dw = (n - 1)/2
    as.vector(t(data[(i - dw):(i + dw), (j - dw):(j + dw)]))
}
tmp = c()
for (i in 2:511) {
    for (j in 2:511) {
        x = extract_path(grey_nimg, i, j, 3)
        tmp = c(tmp, x)
    }
}
res = matrix(tmp, nrow = 260100, byrow = TRUE)

Apply pca and plot:

pca_res = prcomp(res, center = TRUE, scale. = TRUE)

# 1st component
pca_res1d = pca_res$x[, 1]
pca_mat = matrix(pca_res1d, nrow = 510, ncol = 510, byrow = TRUE)
pca_mat_scaled = (pca_mat - min(pca_mat))/(max(pca_mat) - min(pca_mat))
if (exists("rasterImage")) {
    plot(1:2, type = "n")
    rasterImage(pca_mat_scaled, 1, 1, 2, 2)
}

# 2nd component
pca_res1d = pca_res$x[, 2]
pca_mat = matrix(pca_res1d, nrow = 510, ncol = 510, byrow = TRUE)
pca_mat_scaled = (pca_mat - min(pca_mat))/(max(pca_mat) - min(pca_mat))
if (exists("rasterImage")) {
    plot(1:2, type = "n")
    rasterImage(pca_mat_scaled, 1, 1, 2, 2)
}

# 3rd component
pca_res1d = pca_res$x[, 3]
pca_mat = matrix(pca_res1d, nrow = 510, ncol = 510, byrow = TRUE)
pca_mat_scaled = (pca_mat - min(pca_mat))/(max(pca_mat) - min(pca_mat))
if (exists("rasterImage")) {
    plot(1:2, type = "n")
    rasterImage(pca_mat_scaled, 1, 1, 2, 2)
}

ev1 = pca_res$rotation[, 1]
ev_scaled1 = (ev1 - min(ev1))/(max(ev1) - min(ev1))
mat_ev1 = matrix(ev_scaled1, nrow = 3, ncol = 3, byrow = TRUE)

ev2 = pca_res$rotation[, 2]
ev_scaled2 = (ev2 - min(ev2))/(max(ev2) - min(ev2))
mat_ev2 = matrix(ev_scaled2, nrow = 3, ncol = 3, byrow = TRUE)

ev3 = pca_res$rotation[, 3]
ev_scaled3 = (ev3 - min(ev3))/(max(ev3) - min(ev3))
mat_ev3 = matrix(ev_scaled3, nrow = 3, ncol = 3, byrow = TRUE)


if (exists("rasterImage")) {
    plot(1:4, type = "n")
    rasterImage(mat_ev1, 1, 1, 2, 2)
    rasterImage(mat_ev2, 2, 1, 3, 2)
    rasterImage(mat_ev3, 3, 1, 4, 2)
}